###Replication Script###

######### PREPARATION############

##Preparation##
library("haven")
library("foreign")
library("dplyr")
library("psych")
library("VIM")
library("regclass")
library("gplots")
library("psych")
library(Hmisc)

#set working directory

# Verify wd
getwd()

#import data
spss_data <- read_sav("Data replicatie 0312.sav")
summary(spss_data)


######## CREATE BELFIUS CODE FOR EACH MUNICIPALITY ########
##Recode string variable 'municipality' into numeric variable 'belfius_type'
spss_data <- mutate(spss_data, belfius_type = case_when(
  spss_data$Municipality %in% c("Hove", "Kapellen", "Schilde", "Zoersel", "Bonheiden", "Keerbergen", "Lubbeek",
                                "Oud-Heverlee", "De Pinte", "Sint-Martens-Latem") ~ 1,
  
  spss_data$Municipality %in% c("Hoeilaart", "Overijse", "Steenokkerzeel","Kraainem", "Linkebeek", "Sint-Genesius-Rode", "Wemmel", 
                                "Wezembeek-Oppem", "Bierbeek", "Herent", "Kortenberg", "Tervuren", "Melle", "Merelbeke") ~ 2,
  
  spss_data$Municipality %in% c("Boechout", "Brasschaat","Brecht", "Kalmthout", "Lint", "Ranst","Rumst","Vosselaar","Kampenhout","Kapelle-op-den-Bos",
                                "Londerzeel","Meise","Merchtem","Opwijk","Ternat","Zemst","Roosdaal","Affligem",
                                "Begijnendijk","Bertem","Boortmeerbeek","Boutersem","Haacht","Hoegaarden","Holsbeek","Huldenberg","Rotselaar","Tremelo",
                                " Tielt-Winge","Erpe-Mere","Buggenhout","Laarne","Destelbergen","Diepenbeek","Zonhoven","Alken","Herk-de-Stad") ~ 3,
  
  spss_data$Municipality %in% c("Sint-Amands", "Baarle-Hertog","Vorselaar","Liedekerke","Mesen","Avelgem","Sint-Lievens-Houtem","Berlare","Moerbeke",
                                "Wachtebeke","Zingem","Brakel","Kluisbergen","Lierde","Gingelom","Halen","Ham","Heers","Essen","Stabroek",
                                "Nijlen","Landen","Wervik","Haaltert","Herzele","Lede","Lebbeke","Kruibeke","Sint-Gillis-Waas","Dilsen-Stokkem") ~ 4,
  
  spss_data$Municipality %in% c("Berlaar", "Putte","Arendonk","Balen","Herenthout","Herselt","Hulshout","Lille","Meerhout","Oud-Turnhout",
                                "Retie","Geetbets","Kortenaken","Zoutleeuw","Linter","Scherpenheuvel-Zichem","Wichelen","Lovendegem",
                                "Waarschoot","Stekene","As","Nieuwerkerken","Zutendaal","Neerpelt","Hechtel-Eksel","Meeuwen-Gruitrode",
                                "Hoeselt","Kortessem","Riemst","Wellen") ~ 5,
  
  spss_data$Municipality %in% c("Zandhoven", "Kasterlee","Bever","Galmaarden","Gooik","Herne","Pepingen","Lennik","Bekkevoort",
                                "Glabbeek","Damme","Jabbeke","Zuienkerke","Waasmunster","Gavere","Lochristi","Nevele","Oosterzele",
                                "Wortegem-Petegem","Horebeke","Maarkedal","Zwalm") ~ 6,
  
  spss_data$Municipality %in% c("Rijkevorsel", "Kortemark","Lo-Reninge","Zonnebeke","Langemark-Poelkapelle","Spiere-Helkijn",
                                "Hooglede","Lichtervelde","Moorslede","Staden","Meulebeke","Pittem","Wingene","Ardooie",
                                "Nazareth","Zulte","Kruishoutem") ~ 7,

  spss_data$Municipality %in% c("Wuustwezel", "Merksplas","Ravels","Beernem","Houthulst","Koekelare","Heuvelland","Vleteren"
                                ,"Gistel","Ichtegem","Oudenburg","Ledegem","Dentergem","Ruiselede","Alveringem","Assenede","Kaprijke",
                                 "Maldegem","Sint-Laureins","Knesselare","Zomergem","Bocholt","Kinrooi","Peer","Hamont-Achel","Borgloon",
                                "Herstappe","Voeren") ~ 8,

  spss_data$Municipality %in% c("Kontich", "Mortsel","Zwijndrecht","Asse","Beersel","Dilbeek","Grimbergen","Halle","Machelen","Sint-Pieters-Leeuw",
                                "Vilvoorde","Zaventem","Drogenbos") ~ 9,
  
  spss_data$Municipality %in% c("Aartselaar", "Schelle","Wommelgem","Puurs","Beerse","Dessel","Grobbendonk","Olen","Laakdal","Anzegem","Deerlijk",
                                "Lendelede","Ingelmunster","Oostrozebeke","Wielsbeke","Lummen","Opglabbeek","Tessenderlo") ~ 10,
  
  spss_data$Municipality %in% c("Bornem", "Westerlo","Oostkamp","Zedelgem","Wevelgem","Zwevegem","Zele","Aalter","Beringen","Heusden-Zolder",
                                "Houthalen-Helchteren") ~ 11,
  
  spss_data$Municipality %in% c("Edegem", "Wijnegem","Malle","Duffel","Sint-Katelijne-Waver","Hoogstraten","Diest","Diksmuide",
                                "Poperinge","Kuurne","Tielt","Veurne","Wetteren","Eeklo","Bree","Overpelt","Schoten","Lier","Geel",
                                "Herentals","Tienen","Ieper","Harelbeke","Waregem","Izegem","Dendermonde","Deinze","Oudenaarde",
                                "Beveren","Lokeren","Sint-Truiden","Lommel") ~ 12,

  spss_data$Municipality %in% c("Mol", "Aarschot","Torhout","Zottegem","Evergem","Maaseik","Bilzen","Lanaken","Maasmechelen","Tongeren",
                                "Heist-op-den-Berg") ~ 13,
  
  spss_data$Municipality %in% c("Boom", "Borsbeek","Hemiksem","Niel","Willebroek","Menen","Denderleeuw","Geraardsbergen",
                                "Ninove","Hamme","Zelzate","Ronse","Temse","Leopoldsburg") ~ 14,
  
  spss_data$Municipality %in% c("Antwerpen", "Mechelen","Turnhout","Leuven","Kortrijk","Brugge","Oostende","Roeselare",
                                "Aalst","Gent","Sint-Niklaas","Genk","Hasselt") ~ 15,
  
  
  spss_data$Municipality %in% c("Blankenberge", "Bredene", "De Haan","De Panne","Knokke-Heist","Koksijde","Middelkerke", "Nieuwpoort") ~ 16,
  
  TRUE ~ as.numeric(NA) 
))



###### CREATE VARIABLE percentage_unemployed ###############
spss_data <- mutate(spss_data, perc_unemployed = case_when(
  spss_data$Municipality == "Aalst" ~ 0.0742,
  spss_data$Municipality == "Aalter" ~ 0.0342,
  spss_data$Municipality == "Aarschot" ~ 0.0511,
  spss_data$Municipality == "Aartselaar" ~ 0.0425,
  spss_data$Municipality == "Affligem" ~ 0.0381,
  spss_data$Municipality == "Alken" ~ 0.0434,
  spss_data$Municipality == "Alveringem" ~ 0.0341,
  spss_data$Municipality == "Antwerpen" ~ 0.1416,
  spss_data$Municipality == "Anzegem" ~ 0.0283,
  spss_data$Municipality == "Ardooie" ~ 0.0351,
  spss_data$Municipality == "Arendonk" ~ 0.0553,
  spss_data$Municipality == "As" ~ 0.0552,
  spss_data$Municipality == "Asse" ~ 0.061,
  spss_data$Municipality == "Assenede" ~ 0.0422,
  spss_data$Municipality == "Avelgem" ~ 0.0461,
  spss_data$Municipality == "Baarle-Hertog" ~ 0.0629,
  spss_data$Municipality == "Balen" ~ 0.0481,
  spss_data$Municipality == "Beernem" ~ 0.0329,
  spss_data$Municipality == "Beerse" ~ 0.0461,
  spss_data$Municipality == "Beersel" ~ 0.0437,
  spss_data$Municipality == "Begijnendijk" ~ 0.0368,
  spss_data$Municipality == "Bekkevoort" ~ 0.0452,
  spss_data$Municipality == "Beringen" ~ 0.0605,
  spss_data$Municipality == "Berlaar" ~ 0.0632,
  spss_data$Municipality == "Berlare" ~ 0.0454,
  spss_data$Municipality == "Bertem" ~ 0.0401,
  spss_data$Municipality == "Bever" ~ 0.0434,
  spss_data$Municipality == "Beveren" ~ 0.0559,
  spss_data$Municipality == "Bierbeek" ~ 0.0452,
  spss_data$Municipality == "Bilzen" ~ 0.059,
  spss_data$Municipality == "Blankenberge" ~ 0.098,
  spss_data$Municipality == "Bocholt" ~ 0.0444,
  spss_data$Municipality == "Boechout" ~ 0.0529,
  spss_data$Municipality == "Bonheiden" ~ 0.0445,
  spss_data$Municipality == "Boom" ~ 0.0892,
  spss_data$Municipality == "Boortmeerbeek" ~ 0.0344,
  spss_data$Municipality == "Borgloon" ~ 0.0534,
  spss_data$Municipality == "Bornem" ~ 0.0486,
  spss_data$Municipality == "Borsbeek" ~ 0.0932,
  spss_data$Municipality == "Boutersem" ~ 0.0388,
  spss_data$Municipality == "Brakel" ~ 0.0387,
  spss_data$Municipality == "Brasschaat" ~ 0.055,
  spss_data$Municipality == "Brecht" ~ 0.0446,
  spss_data$Municipality == "Bredene" ~ 0.0644,
  spss_data$Municipality == "Bree" ~ 0.0489,
  spss_data$Municipality == "Brugge" ~ 0.0647,
  spss_data$Municipality == "Buggenhout" ~ 0.0385,
  spss_data$Municipality == "Damme" ~ 0.0395,
  spss_data$Municipality == "De Haan" ~ 0.0633,
  spss_data$Municipality == "De Panne" ~ 0.0813,
  spss_data$Municipality == "De Pinte" ~ 0.0338,
  spss_data$Municipality == "Deerlijk" ~ 0.0385,
  spss_data$Municipality == "Deinze" ~ 0.042,
  spss_data$Municipality == "Denderleeuw" ~ 0.0622,
  spss_data$Municipality == "Dendermonde" ~ 0.0694,
  spss_data$Municipality == "Dentergem" ~ 0.0301,
  spss_data$Municipality == "Dessel" ~ 0.0501,
  spss_data$Municipality == "Destelbergen" ~ 0.0404,
  spss_data$Municipality == "Diepenbeek" ~ 0.0477,
  spss_data$Municipality == "Diest" ~ 0.0665,
  spss_data$Municipality == "Diksmuide" ~ 0.0431,
  spss_data$Municipality == "Dilbeek" ~ 0.0523,
  spss_data$Municipality == "Dilsen-Stokkem" ~ 0.0686,
  spss_data$Municipality == "Drogenbos" ~ 0.0737,
  spss_data$Municipality == "Duffel" ~ 0.0539,
  spss_data$Municipality == "Edegem" ~ 0.0618,
  spss_data$Municipality == "Eeklo" ~ 0.0841,
  spss_data$Municipality == "Erpe-Mere" ~ 0.0477,
  spss_data$Municipality == "Essen" ~ 0.0529,
  spss_data$Municipality == "Evergem" ~ 0.0419,
  spss_data$Municipality == "Galmaarden" ~ 0.034,
  spss_data$Municipality == "Gavere" ~ 0.0318,
  spss_data$Municipality == "Geel" ~ 0.0587,
  spss_data$Municipality == "Geetbets" ~ 0.0466,
  spss_data$Municipality == "Genk" ~ 0.0934,
  spss_data$Municipality == "Gent" ~ 0.1028,
  spss_data$Municipality == "Geraardsbergen" ~ 0.0752,
  spss_data$Municipality == "Gingelom" ~ 0.0508,
  spss_data$Municipality == "Gistel" ~ 0.0414,
  spss_data$Municipality == "Glabbeek" ~ 0.0329,
  spss_data$Municipality == "Gooik" ~ 0.0351,
  spss_data$Municipality == "Grimbergen" ~ 0.0591,
  spss_data$Municipality == "Grobbendonk" ~ 0.0458,
  spss_data$Municipality == "Haacht" ~ 0.0432,
  spss_data$Municipality == "Haaltert" ~ 0.0368,
  spss_data$Municipality == "Halen" ~ 0.0484,
  spss_data$Municipality == "Halle" ~ 0.059,
  spss_data$Municipality == "Ham" ~ 0.0451,
  spss_data$Municipality == "Hamme" ~ 0.0539,
  spss_data$Municipality == "Hamont-Achel" ~ 0.0534,
  spss_data$Municipality == "Harelbeke" ~ 0.0502,
  spss_data$Municipality == "Hasselt" ~ 0.0814,
  spss_data$Municipality == "Hechtel-Eksel" ~ 0.0504,
  spss_data$Municipality == "Heers" ~ 0.0508,
  spss_data$Municipality == "Heist-op-den-Berg" ~ 0.0461,
  spss_data$Municipality == "Hemiksem" ~ 0.0682,
  spss_data$Municipality == "Herent" ~ 0.0492,
  spss_data$Municipality == "Herentals" ~ 0.0642,
  spss_data$Municipality == "Herenthout" ~ 0.0458,
  spss_data$Municipality == "Herk-de-Stad" ~ 0.0466,
  spss_data$Municipality == "Herne" ~ 0.0366,
  spss_data$Municipality == "Herselt" ~ 0.0402,
  spss_data$Municipality == "Herstappe" ~ NA,
  spss_data$Municipality == "Herzele" ~ 0.0435,
  spss_data$Municipality == "Heusden-Zolder" ~ 0.0688,
  spss_data$Municipality == "Heuvelland" ~ 0.0371,
  spss_data$Municipality == "Hoegaarden" ~ 0.0442,
  spss_data$Municipality == "Hoeilaart" ~ 0.0481,
  spss_data$Municipality == "Hoeselt" ~ 0.0517,
  spss_data$Municipality == "Holsbeek" ~ 0.0293,
  spss_data$Municipality == "Hooglede" ~ 0.0358,
  spss_data$Municipality == "Hoogstraten" ~ 0.0597,
  spss_data$Municipality == "Horebeke" ~ 0.0302,
  spss_data$Municipality == "Houthalen-Helchteren" ~ 0.0737,
  spss_data$Municipality == "Houthulst" ~ 0.0328,
  spss_data$Municipality == "Hove" ~ 0.0416,
  spss_data$Municipality == "Huldenberg" ~ 0.0296,
  spss_data$Municipality == "Hulshout" ~ 0.0392,
  spss_data$Municipality == "Ichtegem" ~ 0.0359,
  spss_data$Municipality == "Ieper" ~ 0.0648,
  spss_data$Municipality == "Ingelmunster" ~ 0.0435,
  spss_data$Municipality == "Izegem" ~ 0.0493,
  spss_data$Municipality == "Jabbeke" ~ 0.0326,
  spss_data$Municipality == "Kalmthout" ~ 0.0429,
  spss_data$Municipality == "Kampenhout" ~ 0.0351,
  spss_data$Municipality == "Kapellen" ~ 0.0478,
  spss_data$Municipality == "Kapelle-op-den-Bos" ~ 0.036,
  spss_data$Municipality == "Kaprijke" ~ 0.0354,
  spss_data$Municipality == "Kasterlee" ~ 0.0463,
  spss_data$Municipality == "Keerbergen" ~ 0.0351,
  spss_data$Municipality == "Kinrooi" ~ 0.0475,
  spss_data$Municipality == "Kluisbergen" ~ 0.042,
  spss_data$Municipality == "Knokke-Heist" ~ 0.0491,
  spss_data$Municipality == "Koekelare" ~ 0.0309,
  spss_data$Municipality == "Koksijde" ~ 0.0595,
  spss_data$Municipality == "Kontich" ~ 0.0469,
  spss_data$Municipality == "Kortemark" ~ 0.0302,
  spss_data$Municipality == "Kortenaken" ~ 0.0319,
  spss_data$Municipality == "Kortenberg" ~ 0.0439,
  spss_data$Municipality == "Kortessem" ~ 0.0531,
  spss_data$Municipality == "Kortrijk" ~ 0.0789,
  spss_data$Municipality == "Kraainem" ~ 0.0497,
  spss_data$Municipality == "Kruibeke" ~ 0.0551,
  spss_data$Municipality == "Kruisem" ~ 0.028,
  spss_data$Municipality == "Kuurne" ~ 0.0513,
  spss_data$Municipality == "Laakdal" ~ 0.0392,
  spss_data$Municipality == "Laarne" ~ 0.0332,
  spss_data$Municipality == "Lanaken" ~ 0.0621,
  spss_data$Municipality == "Landen" ~ 0.0559,
  spss_data$Municipality == "Langemark-Poelkapelle" ~ 0.0344,
  spss_data$Municipality == "Lebbeke" ~ 0.0436,
  spss_data$Municipality == "Lede" ~ 0.0453,
  spss_data$Municipality == "Ledegem" ~ 0.0276,
  spss_data$Municipality == "Lendelede" ~ 0.0413,
  spss_data$Municipality == "Lennik" ~ 0.0349,
  spss_data$Municipality == "Leopoldsburg" ~ 0.0731,
  spss_data$Municipality == "Leuven" ~ 0.0788,
  spss_data$Municipality == "Lichtervelde" ~ 0.0272,
  spss_data$Municipality == "Liedekerke" ~ 0.0534,
  spss_data$Municipality == "Lier" ~ 0.0775,
  spss_data$Municipality == "Lierde" ~ 0.0371,
  spss_data$Municipality == "Lievegem" ~ 0.0403,
  spss_data$Municipality == "Lille" ~ 0.0466,
  spss_data$Municipality == "Linkebeek" ~ 0.0761,
  spss_data$Municipality == "Lint" ~ 0.0461,
  spss_data$Municipality == "Linter" ~ 0.0483,
  spss_data$Municipality == "Lochristi" ~ 0.0352,
  spss_data$Municipality == "Lokeren" ~ 0.0623,
  spss_data$Municipality == "Lommel" ~ 0.0574,
  spss_data$Municipality == "Londerzeel" ~ 0.0352,
  spss_data$Municipality == "Lo-Reninge" ~ 0.0212,
  spss_data$Municipality == "Lubbeek" ~ 0.0355,
  spss_data$Municipality == "Lummen" ~ 0.0405,
  spss_data$Municipality == "Maarkedal" ~ 0.0237,
  spss_data$Municipality == "Maaseik" ~ 0.0574,
  spss_data$Municipality == "Maasmechelen" ~ 0.0818,
  spss_data$Municipality == "Machelen" ~ 0.0813,
  spss_data$Municipality == "Maldegem" ~ 0.0393,
  spss_data$Municipality == "Malle" ~ 0.0462,
  spss_data$Municipality == "Mechelen" ~ 0.0884,
  spss_data$Municipality == "Meerhout" ~ 0.0478,
  spss_data$Municipality == "Meise" ~ 0.0446,
  spss_data$Municipality == "Melle" ~ 0.0469,
  spss_data$Municipality == "Menen" ~ 0.0808,
  spss_data$Municipality == "Merchtem" ~ 0.0424,
  spss_data$Municipality == "Merelbeke" ~ 0.0445,
  spss_data$Municipality == "Merksplas" ~ 0.079,
  spss_data$Municipality == "Mesen" ~ 0.062,
  spss_data$Municipality == "Meulebeke" ~ 0.0307,
  spss_data$Municipality == "Middelkerke" ~ 0.0646,
  spss_data$Municipality == "Moerbeke" ~ 0.0377,
  spss_data$Municipality == "Mol" ~ 0.0627,
  spss_data$Municipality == "Moorslede" ~ 0.0365,
  spss_data$Municipality == "Mortsel" ~ 0.0767,
  spss_data$Municipality == "Nazareth" ~ 0.0382,
  spss_data$Municipality == "Niel" ~ 0.0538,
  spss_data$Municipality == "Nieuwerkerken" ~ 0.0399,
  spss_data$Municipality == "Nieuwpoort" ~ 0.0737,
  spss_data$Municipality == "Nijlen" ~ 0.0449,
  spss_data$Municipality == "Ninove" ~ 0.0567,
  spss_data$Municipality == "Olen" ~ 0.048,
  spss_data$Municipality == "Oostende" ~ 0.1205,
  spss_data$Municipality == "Oosterzele" ~ 0.0281,
  spss_data$Municipality == "Oostkamp" ~ 0.0338,
  spss_data$Municipality == "Oostrozebeke" ~ 0.0403,
  spss_data$Municipality == "Opwijk" ~ 0.0414,
  spss_data$Municipality == "Oudenaarde" ~ 0.0593,
  spss_data$Municipality == "Oudenburg" ~ 0.0426,
  spss_data$Municipality == "Oud-Heverlee" ~ 0.0397,
  spss_data$Municipality == "Oudsbergen" ~ 0.0423,
  spss_data$Municipality == "Oud-Turnhout" ~ 0.0515,
  spss_data$Municipality == "Overijse" ~ 0.0382,
  spss_data$Municipality == "Peer" ~ 0.0408,
  spss_data$Municipality == "Pelt" ~ 0.0528,
  spss_data$Municipality == "Pepingen" ~ 0.0299,
  spss_data$Municipality == "Pittem" ~ 0.0354,
  spss_data$Municipality == "Poperinge" ~ 0.046,
  spss_data$Municipality == "Putte" ~ 0.039,
  spss_data$Municipality == "Puurs-Sint-Amands" ~ 0.0393,
  spss_data$Municipality == "Ranst" ~ 0.039,
  spss_data$Municipality == "Ravels" ~ 0.0534,
  spss_data$Municipality == "Riemst" ~ 0.052,
  spss_data$Municipality == "Retie" ~ 0.0483,
  spss_data$Municipality == "Rijkevorsel" ~ 0.0378,
  spss_data$Municipality == "Roeselare" ~ 0.0748,
  spss_data$Municipality == "Ronse" ~ 0.1055,
  spss_data$Municipality == "Roosdaal" ~ 0.0337,
  spss_data$Municipality == "Rotselaar" ~ 0.0323,
  spss_data$Municipality == "Ruiselede" ~ 0.0337,
  spss_data$Municipality == "Rumst" ~ 0.0404,
  spss_data$Municipality == "Schelle" ~ 0.0405,
  spss_data$Municipality == "Scherpenheuvel-Zichem" ~ 0.0525,
  spss_data$Municipality == "Schilde" ~ 0.0424,
  spss_data$Municipality == "Schoten" ~ 0.0546,
  spss_data$Municipality == "Sint-Genesius-Rode" ~ 0.0554,
  spss_data$Municipality == "Sint-Gillis-Waas" ~ 0.0494,
  spss_data$Municipality == "Sint-Katelijne-Waver" ~ 0.0421,
  spss_data$Municipality == "Sint-Laureins" ~ 0.0506,
  spss_data$Municipality == "Sint-Lievens-Houtem" ~ 0.0321,
  spss_data$Municipality == "Sint-Martens-Latem" ~ 0.0346,
  spss_data$Municipality == "Sint-Niklaas" ~ 0.099,
  spss_data$Municipality == "Sint-Pieters-Leeuw" ~ 0.0622,
  spss_data$Municipality == "Sint-Truiden" ~ 0.0697,
  spss_data$Municipality == "Spiere-Helkijn" ~ 0.0534,
  spss_data$Municipality == "Stabroek" ~ 0.0455,
  spss_data$Municipality == "Staden" ~ 0.0368,
  spss_data$Municipality == "Steenokkerzeel" ~ 0.0426,
  spss_data$Municipality == "Stekene" ~ 0.0459,
  spss_data$Municipality == "Temse" ~ 0.0654,
  spss_data$Municipality == "Ternat" ~ 0.0378,
  spss_data$Municipality == "Tervuren" ~ 0.0518,
  spss_data$Municipality == "Tessenderlo" ~ 0.0477,
  spss_data$Municipality == "Tielt" ~ 0.0444,
  spss_data$Municipality == "Tielt-Winge" ~ 0.0481,
  spss_data$Municipality == "Tienen" ~ 0.0481,
  spss_data$Municipality == "Tongeren" ~ 0.0776,
  spss_data$Municipality == "Torhout" ~ 0.0491,
  spss_data$Municipality == "Tremelo" ~ 0.0393,
  spss_data$Municipality == "Turnhout" ~ 0.1139,
  spss_data$Municipality == "Veurne" ~ 0.0435,
  spss_data$Municipality == "Vilvoorde" ~ 0.0785,
  spss_data$Municipality == "Vleteren" ~ 0.0282,
  spss_data$Municipality == "Voeren" ~ 0.0555,
  spss_data$Municipality == "Vorselaar" ~ 0.0423,
  spss_data$Municipality == "Vosselaar" ~ 0.0481,
  spss_data$Municipality == "Waasmunster" ~ 0.0518,
  spss_data$Municipality == "Wachtebeke" ~ 0.0452,
  spss_data$Municipality == "Waregem" ~ 0.049,
  spss_data$Municipality == "Wellen" ~ 0.0368,
  spss_data$Municipality == "Wemmel" ~ 0.0714,
  spss_data$Municipality == "Wervik" ~ 0.0654,
  spss_data$Municipality == "Westerlo" ~ 0.0445,
  spss_data$Municipality == "Wetteren" ~ 0.0626,
  spss_data$Municipality == "Wevelgem" ~ 0.0423,
  spss_data$Municipality == "Wezembeek-Oppem" ~ 0.0617,
  spss_data$Municipality == "Wichelen" ~ 0.0517,
  spss_data$Municipality == "Wielsbeke" ~ 0.0397,
  spss_data$Municipality == "Wijnegem" ~ 0.0598,
  spss_data$Municipality == "Wingene" ~ 0.0289,
  spss_data$Municipality == "Willebroek" ~ 0.0798,
  spss_data$Municipality == "Wommelgem" ~ 0.0528,
  spss_data$Municipality == "Wortegem-Petegem" ~ 0.0258,
  spss_data$Municipality == "Wuustwezel" ~ 0.0448,
  spss_data$Municipality == "Zandhoven" ~ 0.0432,
  spss_data$Municipality == "Zaventem" ~ 0.0658,
  spss_data$Municipality == "Zedelgem" ~ 0.0282,
  spss_data$Municipality == "Zele" ~ 0.0598,
  spss_data$Municipality == "Zelzate" ~ 0.0807,
  spss_data$Municipality == "Zemst" ~ 0.0291,
  spss_data$Municipality == "Zoersel" ~ 0.0358,
  spss_data$Municipality == "Zonhoven" ~ 0.0447,
  spss_data$Municipality == "Zonnebeke" ~ 0.0326,
  spss_data$Municipality == "Zottegem" ~ 0.0501,
  spss_data$Municipality == "Zoutleeuw" ~ 0.0474,
  spss_data$Municipality == "Zuienkerke" ~ 0.0354,
  spss_data$Municipality == "Zulte" ~ 0.0379,
  spss_data$Municipality == "Zutendaal" ~ 0.0489,
  spss_data$Municipality == "Zwalm" ~ 0.0489,
  spss_data$Municipality == "Zwevegem" ~ 0.0432,
  spss_data$Municipality == "Zwijndrecht" ~ 0.0744,
  
  
  TRUE ~ NA_real_ 
))


########### EXPLORE MISSINGS##########
summary(spss_data)
spss_data$VAR00001 <- NULL
spss_data$Q57<- NULL




#Check if all variables are numeric
test_numerical_data <- subset(spss_data)

non_numeric_vars <- sapply(test_numerical_data, function(x) !is.numeric(x))

test_numerical_data$municipality_numeric <- as.numeric(as.factor(test_numerical_data$Municipality))


library(visdat)
vis_miss(test_numerical_data)
rows_with_missing <- !complete.cases(test_numerical_data)
print(which(rows_with_missing))

test_numerical_data<- test_numerical_data[-c(541, 547), ]
test_numerical_data<- test_numerical_data[-c(361), ]
test_numerical_data<- test_numerical_data[-c(215), ]
test_numerical_data<- test_numerical_data[-c(139), ]

vis_miss(test_numerical_data)

# Print the names of non-numeric variables
print(names(non_numeric_vars[non_numeric_vars]))

# Create a missing data plot
#aggr_plot <- aggr(test_numerical_data, col = c('navajowhite', 'lightcoral'), numbers = TRUE, sortVars = TRUE, labels = names(spss_data), cex.axis = 0.7, gap = 3)

##Check correlation in missings
#cor_matrix <- cor(test_numerical_data, use = "pairwise.complete.obs")
#print(cor_matrix)

#Chi-square test for MCAR
# Create observed table
#observed <- table(is.na(spss_data$Q55))

# Calculate expected probabilities
#expected_prob <- c(mean(is.na(spss_data$Q55)), mean(!is.na(spss_data$Q55)))

# Perform chi-square test
#chisq_result <- chisq.test(observed, p = expected_prob)
#print(chisq_result)

## explore Q55
#summary(test_numerical_data$Q55)
#class_Q55 <- class(test_numerical_data$Q55)
#print(class_Q55)
#str(test_numerical_data$Q55)
#attributes(test_numerical_data$Q55)


####### ANALYSIS#############
### 1: Factor analysis ######

## Reverse items 6/7
library(labelled)
variables_to_reverse <- c("Q59_6", "Q59_7")
test_numerical_data[, variables_to_reverse] <- lapply(test_numerical_data[, variables_to_reverse], haven::as_factor)
test_numerical_data <- test_numerical_data %>%
  mutate(across(all_of(variables_to_reverse), ~ ifelse(is.na(.), NA, nlevels(.) - as.numeric(.))))


## test > factor loadings beneden .10 niet weergegeven? 

factor_data <- test_numerical_data[,5:12]
factor_data_no_missing <- na.omit(factor_data)
 
### Model 1 - only complete data, 4 factors  ##
fa_model_1<- fa(factor_data_no_missing, nfactors = 4, rotate = "varimax")
loadings <- fa_model_1$loadings
print(loadings)

# Access eigenvalues
eigenvalues <- fa_model_1$values
print(eigenvalues)

# Calculate Cronbach's alpha
alpha_result <- alpha(factor_data_no_missing,check.keys=TRUE)
print(alpha_result)  

## Model 2 - all data, 4 factors
fa_model_2<- fa(factor_data, nfactors = 4, rotate = "varimax")
summary(fa_model_2)
loadings_2 <- fa_model_2$loadings
print(loadings_2)

# Access eigenvalues
eigenvalues <- fa_model_2$values
print(eigenvalues)

# Calculate Cronbach's alpha
alpha_result <- alpha(factor_data,check.keys=TRUE)
print(alpha_result)  
  

### Model 3 - only complete data, 1 factors  ##
fa_model_3<- fa(factor_data, nfactors = 1, rotate = "varimax")
loadings <- fa_model_3$loadings
print(loadings)

loadings <- fa_model_3$loadings
print(loadings["Q59_6", ])

# Access eigenvalues
eigenvalues <- fa_model_3$values
print(eigenvalues)


# Calculate Cronbach's alpha
alpha_result <- alpha(factor_data,check.keys=TRUE)
print(alpha_result)  




### 1a: Create DV1 / Rescale items  ####

## Drop item 3, conform original article ##
test_numerical_data$Q59_3<- NULL

# # Remove rows with missing valuesin columns Q59_1 to Q59_8
missing_rows <- is.na(test_numerical_data$Q59_1) | is.na(test_numerical_data$Q59_2) | is.na(test_numerical_data$Q59_4) | is.na(test_numerical_data$Q59_5) | is.na(test_numerical_data$Q59_6)| is.na(test_numerical_data$Q59_7)| is.na(test_numerical_data$Q59_8)
test_numerical_data <- test_numerical_data[!missing_rows, ]


# Create a new variable 'DV1' as the sum of the seven Likert-scaled items
subset_data <- subset(test_numerical_data, select = c("Q59_1", "Q59_2","Q59_4","Q59_5","Q59_6","Q59_7","Q59_8"))
test_numerical_data$DV1 <- rowMeans(subset_data)

# Perform linear transformation of DV1
summary(test_numerical_data$DV1)
test_numerical_data$DV1 <- ((test_numerical_data$DV1 -  0.7143) / (4.7143  -  0.7143)) * (100 - 0) + 0
summary(test_numerical_data$DV1)
sd(test_numerical_data$DV1)

# rescale Dv2: X*10 #
summary(test_numerical_data$Q55)

rescale_variable <- function(variable) {
  rescaled <- variable * 10
  return(rescaled)
}

test_numerical_data$Q55 <- rescale_variable(test_numerical_data$Q55)
summary(test_numerical_data$Q55)
test_numerical_data$DV2<-test_numerical_data$Q55

# rescale DV3: X*25-25 #

summary(test_numerical_data$Q58)

rescale_variable <- function(variable) {
  rescaled <- (variable * 25)-25
  return(rescaled)
}

test_numerical_data$Q58 <- rescale_variable(test_numerical_data$Q58)
summary(test_numerical_data$Q58)
test_numerical_data$DV3<-test_numerical_data$Q58
test_numerical_data$DV4<-test_numerical_data$Q57_a


#### Create dummy ideology ####
structure(test_numerical_data$Q35)
test_numerical_data$Q35D <- ifelse(test_numerical_data$Q35 >= 6 & test_numerical_data$Q35 <= 10, 1, 0)
describe(test_numerical_data$Q35D)


### Create dummy education ### 
library(Hmisc)
label(spss_data$Q39)
test_numerical_data$Q39D <- ifelse(test_numerical_data$Q39 >= 3 & test_numerical_data$Q39 <= 5, 1, 0)
describe(test_numerical_data$Q39D)

####Descriptive statistics####
summary(test_numerical_data)
View(test_numerical_data)
describe(test_numerical_data$DV1)
describe(test_numerical_data$DV2)
describe(test_numerical_data$DV3)
describe(test_numerical_data$DV4)

test_numerical_data$Q35 <- as.numeric(test_numerical_data$Q35)
attributes(test_numerical_data$Q35) <- NULL
str(test_numerical_data$Q35)
summary(test_numerical_data$Q35D)
describe(test_numerical_data$Q35)
describe(test_numerical_data$Q40_a)
describe(test_numerical_data$Deserving)
describe(test_numerical_data$Stress)
describe(test_numerical_data$Q37_a)
describe(test_numerical_data$Q39D)


sd(test_numerical_data$DV1)
sd(test_numerical_data$DV2, na.rm = TRUE)
sd(test_numerical_data$DV3, na.rm = TRUE)
sd(test_numerical_data$DV4, na.rm = TRUE)
sd(test_numerical_data$Q35D, na.rm = TRUE)
sd(test_numerical_data$Q40_a, na.rm = TRUE)
sd(test_numerical_data$Deserving, na.rm = TRUE)
sd(test_numerical_data$Stress, na.rm = TRUE)
sd(test_numerical_data$Q37_a, na.rm = TRUE)
sd(test_numerical_data$Q39D, na.rm = TRUE)



### 2: Manipulation Check ####
View(test_numerical_data)
result <- t.test(test_numerical_data$Q56 ~ test_numerical_data$Deserving, data = test_numerical_data)
print(result)

## Means for high/low deservingness 
means_tapply <- tapply(test_numerical_data$Q56, test_numerical_data$Deserving, mean)
print(means_tapply)

means_aggregate <- aggregate(Q56 ~ Deserving, data = test_numerical_data, mean)
print(means_aggregate)


### 3: Test Balance of treatments ####

## Condition A: FL_20_DO_Maria_deserving_stress
result <- t.test(belfius_type ~ FL_20_DO_Maria_deserving_stress, data = test_numerical_data)
print(result)
result <- t.test(perc_unemployed ~ FL_20_DO_Maria_deserving_stress, data = test_numerical_data)
print(result)
result <- t.test(Q40_a ~ FL_20_DO_Maria_deserving_stress, data = test_numerical_data)
print(result)
result <- t.test(Q35 ~ FL_20_DO_Maria_deserving_stress, data = test_numerical_data)
print(result)

#chi-square to test Q39 (2 categorical variables)
contingency_table <- table(test_numerical_data$Q39, test_numerical_data$FL_20_DO_Maria_deserving_stress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)

#chi-square to test Q37_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q37_a, test_numerical_data$FL_20_DO_Maria_deserving_stress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)

#chi-square to test Q40_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q40_a, test_numerical_data$FL_20_DO_Maria_deserving_stress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)



## Condition B: FL_20_DO_Maria_undeserving_stress
result <- t.test(belfius_type ~ FL_20_DO_Maria_undeserving_stress, data = test_numerical_data)
print(result)
result <- t.test(perc_unemployed ~ FL_20_DO_Maria_undeserving_stress, data = test_numerical_data)
print(result)
result <- t.test(Q40_a ~ FL_20_DO_Maria_undeserving_stress, data = test_numerical_data)
print(result)
result <- t.test(Q35 ~ FL_20_DO_Maria_undeserving_stress, data = test_numerical_data)
print(result)

#chi-square to test Q39 (2 categorical variables)
contingency_table <- table(test_numerical_data$Q39, test_numerical_data$FL_20_DO_Maria_undeserving_stress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)

#chi-square to test Q37_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q37_a, test_numerical_data$FL_20_DO_Maria_undeserving_stress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)

#chi-square to test Q40_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q40_a, test_numerical_data$FL_20_DO_Maria_undeserving_stress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)


## Condition C: FL_20_DO_Maria_undeserving_nostress
result <- t.test(belfius_type ~ FL_20_DO_Maria_undeserving_nostress, data = test_numerical_data)
print(result)
result <- t.test(perc_unemployed ~ FL_20_DO_Maria_undeserving_nostress, data = test_numerical_data)
print(result)
result <- t.test(Q40_a ~ FL_20_DO_Maria_undeserving_nostress, data = test_numerical_data)
print(result)
result <- t.test(Q35 ~ FL_20_DO_Maria_undeserving_nostress, data = test_numerical_data)
print(result)

#chi-square to test Q39 (2 categorical variables)
contingency_table <- table(test_numerical_data$Q39, test_numerical_data$FL_20_DO_Maria_undeserving_nostress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)

#chi-square to test Q37_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q37_a, test_numerical_data$FL_20_DO_Maria_undeserving_nostress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)


#chi-square to test Q40_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q40_a, test_numerical_data$FL_20_DO_Maria_undeserving_nostress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)


##Condition D: FL_20_DO_Maria_deserving_nostress
result <- t.test(belfius_type ~ FL_20_DO_Maria_deserving_nostress, data = test_numerical_data)
print(result)
result <- t.test(perc_unemployed ~ FL_20_DO_Maria_deserving_nostress, data = test_numerical_data)
print(result)
result <- t.test(Q40_a ~ FL_20_DO_Maria_deserving_nostress, data = test_numerical_data)
print(result)
result <- t.test(Q35 ~ FL_20_DO_Maria_deserving_nostress, data = test_numerical_data)
print(result)

#chi-square to test Q39 (2 categorical variables)
contingency_table <- table(test_numerical_data$Q39, test_numerical_data$FL_20_DO_Maria_deserving_nostress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)


#chi-square to test Q37_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q37_a, test_numerical_data$FL_20_DO_Maria_deserving_nostress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)

#chi-square to test Q40_a (2 binary variables)
contingency_table <- table(test_numerical_data$Q40_a, test_numerical_data$FL_20_DO_Maria_deserving_nostress)
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)


#### 4: Correlation table DV's ####
cor_vars <- c("Q56", "DV1", "DV2", "DV3","DV4")
cor_data <- test_numerical_data[, cor_vars]
cor_data <- na.omit(cor_data)
cor_matrix <- rcorr(as.matrix(cor_data), type = "pearson")
print(cor_matrix)

####### 4a: Test significant difference correlations ####
# Calculate correlations
r_AB <- cor(Q56, DV1)
r_AC <- cor(Q56, Q57_a)
r_AD <- cor(Q56, Q55)
r_AE <- cor(Q56, Q58)

r_BC <- cor(DV1, Q57_a)
r_BD <- cor(DV1, Q55)
r_BE <- cor(DV1, Q58)

r_CD <- cor(Q57_a, Q55)
r_CE <- cor(Q57_a, Q58)

r_DE <- cor(Q55, Q58)

# Fisher's Z transformation
Z_AB <- 0.5 * log((1 + r_AB) / (1 - r_AB))
Z_AC <- 0.5 * log((1 + r_AC) / (1 - r_AC))
Z_AD <- 0.5 * log((1 + r_AD) / (1 - r_AD))
Z_AE <- 0.5 * log((1 + r_AE) / (1 - r_AE))

Z_BC <- 0.5 * log((1 + r_BC) / (1 - r_BC))
Z_BD <- 0.5 * log((1 + r_BD) / (1 - r_BD))
Z_BE <- 0.5 * log((1 + r_BE) / (1 - r_BE))
Z_CD <- 0.5 * log((1 + r_CD) / (1 - r_CD))
Z_CE <- 0.5 * log((1 + r_CE) / (1 - r_CE))
Z_DE <- 0.5 * log((1 + r_DE) / (1 - r_DE))


# Standard errors of Fisher's Z
N <- length(A)
SE_AB <- 1 / sqrt(N - 3)
SE_AC <- 1 / sqrt(N - 3)
SE_AD <- 1 / sqrt(N - 3)
SE_AE <- 1 / sqrt(N - 3)

SE_BC <- 1 / sqrt(N - 3)
SE_BD <- 1 / sqrt(N - 3)
SE_BE <- 1 / sqrt(N - 3)

SE_CD <- 1 / sqrt(N - 3)
SE_CE <- 1 / sqrt(N - 3)

SE_DE <- 1 / sqrt(N - 3)

# Test statistic
Z_diff1B <- (Z_AB - Z_BC) / sqrt(SE_AB^2 + SE_BC^2)
Z_diff2B <- (Z_AB - Z_BD) / sqrt(SE_AB^2 + SE_BD^2)
Z_diff3B <- (Z_AB - Z_BE) / sqrt(SE_AB^2 + SE_BE^2)
Z_diff4B <- (Z_AB - Z_CD) / sqrt(SE_AB^2 + SE_CD^2)
Z_diff5B <- (Z_AB - Z_CE) / sqrt(SE_AB^2 + SE_CE^2)
Z_diff6B <- (Z_AB - Z_DE) / sqrt(SE_AB^2 + SE_DE^2)

Z_diff1C <- (Z_AC - Z_BC) / sqrt(SE_AC^2 + SE_BC^2)
Z_diff2C <- (Z_AC - Z_BD) / sqrt(SE_AC^2 + SE_BD^2)
Z_diff3C <- (Z_AC - Z_BE) / sqrt(SE_AC^2 + SE_BE^2)
Z_diff4C <- (Z_AC - Z_CD) / sqrt(SE_AC^2 + SE_CD^2)
Z_diff5C <- (Z_AC - Z_CE) / sqrt(SE_AC^2 + SE_CE^2)
Z_diff6C <- (Z_AC - Z_DE) / sqrt(SE_AC^2 + SE_DE^2)

Z_diff1D <- (Z_AD - Z_BC) / sqrt(SE_AD^2 + SE_BC^2)
Z_diff2D <- (Z_AD - Z_BD) / sqrt(SE_AD^2 + SE_BD^2)
Z_diff3D <- (Z_AD - Z_BE) / sqrt(SE_AD^2 + SE_BE^2)
Z_diff4D <- (Z_AD - Z_CD) / sqrt(SE_AD^2 + SE_CD^2)
Z_diff5D <- (Z_AD - Z_CE) / sqrt(SE_AD^2 + SE_CE^2)
Z_diff6D <- (Z_AD - Z_DE) / sqrt(SE_AD^2 + SE_DE^2)

Z_diff1E <- (Z_AE - Z_BC) / sqrt(SE_AE^2 + SE_BC^2)
Z_diff2E <- (Z_AE - Z_BD) / sqrt(SE_AE^2 + SE_BD^2)
Z_diff3E <- (Z_AE - Z_BE) / sqrt(SE_AE^2 + SE_BE^2)
Z_diff4E <- (Z_AE - Z_CD) / sqrt(SE_AE^2 + SE_CD^2)
Z_diff5E <- (Z_AE - Z_CE) / sqrt(SE_AE^2 + SE_CE^2)
Z_diff6E <- (Z_AE - Z_DE) / sqrt(SE_AE^2 + SE_DE^2)

# Two-tailed p-value
p_value <- 2 * (1 - pnorm(abs(Z_diff)))

# Print results
cat("Correlation A-B:", round(r_AB, 3), "\n")
cat("Correlation B-C:", round(r_BC, 3), "\n")
cat("Test statistic (Z_diff):", round(Z_diff1B, 3), "\n")
cat("Two-tailed p-value:", round(p_value, 3), "\n")




# Calculate the correlation matrix using only complete cases
cor_matrix <- cor(cor_data, method = "pearson", use = "complete.obs")

# Function to calculate p-values for each correlation
get_pvalues <- function(cor_matrix, cor_data) {
  p_values <- matrix(NA, ncol = ncol(cor_matrix), nrow = ncol(cor_matrix))
  for (i in 1:(ncol(cor_matrix) - 1)) {
    for (j in (i + 1):ncol(cor_matrix)) {
      subset_data <- na.omit(cor_data[, c(i, j)])
      # Convert the subset_data to numeric to handle non-numeric variables
      subset_data <- as.matrix(subset_data)
      cor_test_result <- cor.test(subset_data[, 1], subset_data[, 2], method = "pearson")
      p_values[i, j] <- cor_test_result$p.value
      p_values[j, i] <- cor_test_result$p.value
    }
  }
  rownames(p_values) <- colnames(p_values) <- colnames(cor_matrix)
  return(p_values)
}

# Get p-values for each correlation
p_values <- get_pvalues(cor_matrix, cor_data)

# Print the correlation matrix
print(cor_matrix)

# Print the corresponding p-values
print(p_values)

num_cases <- nrow(na.omit(cor_data))
print(num_cases)

View(test_numerical_data)


####### 5: Regression Analysis Clustered ####

### Create Cluster variable####
cluster_var <- test_numerical_data$municipality_numeric

### DV1:Attitudinal Benefits ####
test_numerical_data_DV1 <- na.omit(test_numerical_data[, c("DV1", "Q35D", "Q40_a", "Deserving", "Stress", "Q37_a", "Q39D", "belfius_type", "municipality_numeric"), drop = FALSE])

# Fit the regression model
reg_model_DV1 <- lm(DV1 ~ Q35D + Q40_a + Deserving + Stress + Q37_a + Q39D + factor(belfius_type), data = na.omit(test_numerical_data_DV1))

# Calculate clustered standard errors
clustered_se <- sandwich::vcovHC(reg_model_DV1, type = "HC0", cluster = ~ cluster_var)

# Display regression results
summary(reg_model_DV1, vcov = clustered_se)
AIC(reg_model_DV1)
num_observations <- nobs(reg_model_DV1)
print(num_observations)

# Total Variance
total_variance <- (summary(reg_model_DV1)$sigma)^2

# Fitted values (predicted values)
fitted_values <- fitted(reg_model_DV1)

# Between-Cluster Variance
between_cluster_variance_DV1 <- var(fitted_values)

# Residuals
residuals <- residuals(reg_model_DV1)

# Within-Cluster Variance
within_cluster_variance_DV1 <- tapply(residuals, na.omit(test_numerical_data_DV1$municipality_numeric, var))
average_within_cluster_variance_DV1 <- mean(within_cluster_variance_DV1, na.rm = TRUE)

# R-squared Between
r_squared_between_DV1 <- between_cluster_variance_DV1 / total_variance

# R-squared Within
TSS <- sum((test_numerical_data_DV1$DV1 - mean(test_numerical_data_DV1$DV1))^2)
print(TSS)
WSS <- tapply((test_numerical_data_DV1$DV1 - ave(test_numerical_data_DV1$DV1, test_numerical_data_DV1$municipality_numeric, FUN = mean))^2, test_numerical_data_DV1$municipality_numeric, sum)
WSS <- sum(WSS, na.rm = TRUE) 

length(test_numerical_data_DV1$DV1)
length(test_numerical_data_DV1$municipality_numeric)

print(TSS)
print(WSS)
R_squared_within <- 1 - (WSS / TSS)

# Print the results
print(paste("R-squared Between:", round(r_squared_between_DV1, 4)))
print(paste("R-squared Within:", round(R_squared_within, 4)))



#### Attitudinal Fairness of Burdensome Policies #####
test_numerical_data_DV2 <- na.omit(test_numerical_data[, c("DV2", "Q35D", "Q40_a", "Deserving", "Stress", "Q37_a", "Q39D", "belfius_type", "municipality_numeric"), drop = FALSE])

reg_model_DV2 <- lm(DV2 ~ Q35D + Q40_a + Deserving + Stress+ Q37_a + Q39D +factor(belfius_type), data = test_numerical_data_DV2)
# Calculate clustered standard errors
clustered_se <- sandwich::vcovHC(reg_model_DV2, type = "HC0", cluster = ~ cluster_var)
# Display regression results
summary(reg_model_DV2, vcov = clustered_se)

num_observations <- nobs(reg_model_DV2)
print(num_observations)

# Total Variance
total_variance_DV2 <- (summary(reg_model_DV2)$sigma)^2

# Fitted values (predicted values)
fitted_values_DV2 <- fitted(reg_model_DV2)

# Between-Cluster Variance
between_cluster_variance_DV2 <- var(fitted_values_DV2)

# Residuals
residuals_DV2 <- residuals(reg_model_DV2)

# Within-Cluster Variance
within_cluster_variance_DV2 <- tapply(residuals_DV2, test_numerical_data_DV2$municipality_numeric, var)
average_within_cluster_variance_DV2 <- mean(within_cluster_variance_DV2, na.rm = TRUE)

# R-squared Between
r_squared_between_DV2 <- between_cluster_variance_DV2 / total_variance_DV2

# R-squared Within
r_squared_within_DV2 <- average_within_cluster_variance_DV2 / total_variance_DV2

# Print the results
print(paste("R-squared Between:", round(r_squared_between_DV2, 4)))
print(paste("R-squared Within:", round(r_squared_within_DV2, 4)))


#### Willingness to use #####
test_numerical_data_DV3 <- na.omit(test_numerical_data[, c("DV3", "Q35D", "Q40_a", "Deserving", "Stress", "Q37_a", "Q39D", "belfius_type", "municipality_numeric"), drop = FALSE])

reg_model_DV3 <- lm(DV3 ~ Q35D + Q40_a + Deserving + Stress+ Q37_a + Q39D +factor(belfius_type), data = test_numerical_data_DV3)
# Calculate clustered standard errors
clustered_se <- sandwich::vcovHC(reg_model_DV3, type = "HC0", cluster = ~ cluster_var)
# Display regression results
summary(reg_model_DV3, vcov = clustered_se)

num_observations <- nobs(reg_model_DV3)
print(num_observations)

# Total Variance
total_variance_DV3 <- (summary(reg_model_DV3)$sigma)^2

# Fitted values (predicted values)
fitted_values_DV3 <- fitted(reg_model_DV3)

# Between-Cluster Variance
between_cluster_variance_DV3<- var(fitted_values_DV3)

# Residuals
residuals_DV3 <- residuals(reg_model_DV3)

# Within-Cluster Variance
within_cluster_variance_DV3 <- tapply(residuals_DV3, test_numerical_data_DV3$municipality_numeric, var)
average_within_cluster_variance_DV3 <- mean(within_cluster_variance_DV3, na.rm = TRUE)

# R-squared Between
r_squared_between_DV3 <- between_cluster_variance_DV3 / total_variance_DV3

# R-squared Within
r_squared_within_DV3 <- average_within_cluster_variance_DV3 / total_variance_DV3

# Print the results
print(paste("R-squared Between:", round(r_squared_between_DV3, 4)))
print(paste("R-squared Within:", round(r_squared_within_DV3, 4)))



#### DV4 Weekly hours assigned #####
test_numerical_data_DV4 <- na.omit(test_numerical_data[, c("Q57_a", "Q35D", "Q40_a", "Deserving", "Stress", "Q37_a", "Q39D", "belfius_type", "municipality_numeric"), drop = FALSE])


reg_model_DV4 <- lm(Q57_a ~ Q35D + Q40_a + Deserving + Stress+ Q37_a + Q39D +factor(belfius_type), data = test_numerical_data_DV4)
# Calculate clustered standard errors
clustered_se <- sandwich::vcovHC(reg_model_DV4, type = "HC0", cluster = ~ cluster_var)

num_observations <- nobs(reg_model_DV4)
print(num_observations)

# Display regression results
summary(reg_model_DV4, vcov = clustered_se)

# Total Variance
total_variance_DV4 <- (summary(reg_model_DV4)$sigma)^2

# Fitted values (predicted values)
fitted_values <- fitted(reg_model_DV4)

# Between-Cluster Variance
between_cluster_variance_DV4 <- var(fitted_values)

# Residuals
residuals_DV4 <- residuals(reg_model_DV4)

# Within-Cluster Variance
within_cluster_variance_DV4 <- tapply(residuals_DV4, test_numerical_data_DV4$municipality_numeric, var)
average_within_cluster_variance_DV4 <- mean(within_cluster_variance_DV4, na.rm = TRUE)

# R-squared Between
r_squared_between <- between_cluster_variance_DV4 / total_variance_DV4

# R-squared Within
r_squared_within <- average_within_cluster_variance_DV4 / total_variance_DV4

# Print the results
print(paste("R-squared Between:", round(r_squared_between, 4)))
print(paste("R-squared Within:", round(r_squared_within, 4)))

